#!pip install numpy
#!pip install pandas
#!pip install matplotlib
#!pip install seaborn
#!pip install plotly
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# for better interactive visualization
import plotly.graph_objects as go
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')
import os
files=os.listdir('C:\\Users\\manje\\Downloads\\Projects\\Covid-19')
files
['.ipynb_checkpoints', 'country_wise_latest.csv', 'covid_19_clean_complete.csv', 'Covid_19_project.ipynb', 'day_wise.csv', 'full_grouped.csv', 'usa_country_wise.csv', 'worldometer_data.csv']
### lets create a function to make our task simpler as we have to read data aggain & again
def read_data(path,filename):
return pd.read_csv(path+'/'+filename)
path='C:\\Users\\manje\\Downloads\\Projects\\Covid-19'# here is the path of datasets
files
['.ipynb_checkpoints', 'country_wise_latest.csv', 'covid_19_clean_complete.csv', 'Covid_19_project.ipynb', 'day_wise.csv', 'full_grouped.csv', 'usa_country_wise.csv', 'worldometer_data.csv']
world_data=read_data(path,'worldometer_data.csv') #here we perform manually read the data by passing file name of the csv
world_data
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 3.311981e+08 | 5032179 | NaN | 162804.0 | NaN | 2576668.0 | NaN | 2292707.0 | 18296.0 | 15194.0 | 492.0 | 63139605.0 | 190640.0 | Americas |
| 1 | Brazil | South America | 2.127107e+08 | 2917562 | NaN | 98644.0 | NaN | 2047660.0 | NaN | 771258.0 | 8318.0 | 13716.0 | 464.0 | 13206188.0 | 62085.0 | Americas |
| 2 | India | Asia | 1.381345e+09 | 2025409 | NaN | 41638.0 | NaN | 1377384.0 | NaN | 606387.0 | 8944.0 | 1466.0 | 30.0 | 22149351.0 | 16035.0 | South-EastAsia |
| 3 | Russia | Europe | 1.459409e+08 | 871894 | NaN | 14606.0 | NaN | 676357.0 | NaN | 180931.0 | 2300.0 | 5974.0 | 100.0 | 29716907.0 | 203623.0 | Europe |
| 4 | South Africa | Africa | 5.938157e+07 | 538184 | NaN | 9604.0 | NaN | 387316.0 | NaN | 141264.0 | 539.0 | 9063.0 | 162.0 | 3149807.0 | 53044.0 | Africa |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 204 | Montserrat | North America | 4.992000e+03 | 13 | NaN | 1.0 | NaN | 10.0 | NaN | 2.0 | NaN | 2604.0 | 200.0 | 61.0 | 12220.0 | NaN |
| 205 | Caribbean Netherlands | North America | 2.624700e+04 | 13 | NaN | NaN | NaN | 7.0 | NaN | 6.0 | NaN | 495.0 | NaN | 424.0 | 16154.0 | NaN |
| 206 | Falkland Islands | South America | 3.489000e+03 | 13 | NaN | NaN | NaN | 13.0 | NaN | 0.0 | NaN | 3726.0 | NaN | 1816.0 | 520493.0 | NaN |
| 207 | Vatican City | Europe | 8.010000e+02 | 12 | NaN | NaN | NaN | 12.0 | NaN | 0.0 | NaN | 14981.0 | NaN | NaN | NaN | Europe |
| 208 | Western Sahara | Africa | 5.986820e+05 | 10 | NaN | 1.0 | NaN | 8.0 | NaN | 1.0 | NaN | 17.0 | 2.0 | NaN | NaN | Africa |
209 rows × 16 columns
group_data=read_data(path,files[5]) # here we accessing the data using file indexing
group_data
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Eastern Mediterranean |
| 1 | 2020-01-22 | Albania | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Europe |
| 2 | 2020-01-22 | Algeria | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Africa |
| 3 | 2020-01-22 | Andorra | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Europe |
| 4 | 2020-01-22 | Angola | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Africa |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 35151 | 2020-07-27 | West Bank and Gaza | 10621 | 78 | 3752 | 6791 | 152 | 2 | 0 | Eastern Mediterranean |
| 35152 | 2020-07-27 | Western Sahara | 10 | 1 | 8 | 1 | 0 | 0 | 0 | Africa |
| 35153 | 2020-07-27 | Yemen | 1691 | 483 | 833 | 375 | 10 | 4 | 36 | Eastern Mediterranean |
| 35154 | 2020-07-27 | Zambia | 4552 | 140 | 2815 | 1597 | 71 | 1 | 465 | Africa |
| 35155 | 2020-07-27 | Zimbabwe | 2704 | 36 | 542 | 2126 | 192 | 2 | 24 | Africa |
35156 rows × 10 columns
usa_data=read_data(path,files[6])
usa_data
| UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | Combined_Key | Date | Confirmed | Deaths | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16 | AS | ASM | 16 | 60.0 | NaN | American Samoa | US | -14.271000 | -170.132000 | American Samoa, US | 1/22/20 | 0 | 0 |
| 1 | 316 | GU | GUM | 316 | 66.0 | NaN | Guam | US | 13.444300 | 144.793700 | Guam, US | 1/22/20 | 0 | 0 |
| 2 | 580 | MP | MNP | 580 | 69.0 | NaN | Northern Mariana Islands | US | 15.097900 | 145.673900 | Northern Mariana Islands, US | 1/22/20 | 0 | 0 |
| 3 | 63072001 | PR | PRI | 630 | 72001.0 | Adjuntas | Puerto Rico | US | 18.180117 | -66.754367 | Adjuntas, Puerto Rico, US | 1/22/20 | 0 | 0 |
| 4 | 63072003 | PR | PRI | 630 | 72003.0 | Aguada | Puerto Rico | US | 18.360255 | -67.175131 | Aguada, Puerto Rico, US | 1/22/20 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 627915 | 84070016 | US | USA | 840 | NaN | Central Utah | Utah | US | 39.372319 | -111.575868 | Central Utah, Utah, US | 7/27/20 | 347 | 1 |
| 627916 | 84070017 | US | USA | 840 | NaN | Southeast Utah | Utah | US | 38.996171 | -110.701396 | Southeast Utah, Utah, US | 7/27/20 | 70 | 0 |
| 627917 | 84070018 | US | USA | 840 | NaN | Southwest Utah | Utah | US | 37.854472 | -111.441876 | Southwest Utah, Utah, US | 7/27/20 | 2781 | 23 |
| 627918 | 84070019 | US | USA | 840 | NaN | TriCounty | Utah | US | 40.124915 | -109.517442 | TriCounty, Utah, US | 7/27/20 | 142 | 0 |
| 627919 | 84070020 | US | USA | 840 | NaN | Weber-Morgan | Utah | US | 41.271160 | -111.914512 | Weber-Morgan, Utah, US | 7/27/20 | 2375 | 24 |
627920 rows × 14 columns
province_data=read_data(path,files[1])
province_data
| Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | Deaths / 100 Cases | Recovered / 100 Cases | Deaths / 100 Recovered | Confirmed last week | 1 week change | 1 week % increase | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 36263 | 1269 | 25198 | 9796 | 106 | 10 | 18 | 3.50 | 69.49 | 5.04 | 35526 | 737 | 2.07 | Eastern Mediterranean |
| 1 | Albania | 4880 | 144 | 2745 | 1991 | 117 | 6 | 63 | 2.95 | 56.25 | 5.25 | 4171 | 709 | 17.00 | Europe |
| 2 | Algeria | 27973 | 1163 | 18837 | 7973 | 616 | 8 | 749 | 4.16 | 67.34 | 6.17 | 23691 | 4282 | 18.07 | Africa |
| 3 | Andorra | 907 | 52 | 803 | 52 | 10 | 0 | 0 | 5.73 | 88.53 | 6.48 | 884 | 23 | 2.60 | Europe |
| 4 | Angola | 950 | 41 | 242 | 667 | 18 | 1 | 0 | 4.32 | 25.47 | 16.94 | 749 | 201 | 26.84 | Africa |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 182 | West Bank and Gaza | 10621 | 78 | 3752 | 6791 | 152 | 2 | 0 | 0.73 | 35.33 | 2.08 | 8916 | 1705 | 19.12 | Eastern Mediterranean |
| 183 | Western Sahara | 10 | 1 | 8 | 1 | 0 | 0 | 0 | 10.00 | 80.00 | 12.50 | 10 | 0 | 0.00 | Africa |
| 184 | Yemen | 1691 | 483 | 833 | 375 | 10 | 4 | 36 | 28.56 | 49.26 | 57.98 | 1619 | 72 | 4.45 | Eastern Mediterranean |
| 185 | Zambia | 4552 | 140 | 2815 | 1597 | 71 | 1 | 465 | 3.08 | 61.84 | 4.97 | 3326 | 1226 | 36.86 | Africa |
| 186 | Zimbabwe | 2704 | 36 | 542 | 2126 | 192 | 2 | 24 | 1.33 | 20.04 | 6.64 | 1713 | 991 | 57.85 | Africa |
187 rows × 15 columns
day_wise=read_data(path,files[4]) #here we perform calling our read_data function with path which is already given to it, and accessing files by giving index no 0,1,2,3 and so on...
day_wise
| Date | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | Deaths / 100 Cases | Recovered / 100 Cases | Deaths / 100 Recovered | No. of countries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | 555 | 17 | 28 | 510 | 0 | 0 | 0 | 3.06 | 5.05 | 60.71 | 6 |
| 1 | 2020-01-23 | 654 | 18 | 30 | 606 | 99 | 1 | 2 | 2.75 | 4.59 | 60.00 | 8 |
| 2 | 2020-01-24 | 941 | 26 | 36 | 879 | 287 | 8 | 6 | 2.76 | 3.83 | 72.22 | 9 |
| 3 | 2020-01-25 | 1434 | 42 | 39 | 1353 | 493 | 16 | 3 | 2.93 | 2.72 | 107.69 | 11 |
| 4 | 2020-01-26 | 2118 | 56 | 52 | 2010 | 684 | 14 | 13 | 2.64 | 2.46 | 107.69 | 13 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 183 | 2020-07-23 | 15510481 | 633506 | 8710969 | 6166006 | 282756 | 9966 | 169714 | 4.08 | 56.16 | 7.27 | 187 |
| 184 | 2020-07-24 | 15791645 | 639650 | 8939705 | 6212290 | 281164 | 6144 | 228736 | 4.05 | 56.61 | 7.16 | 187 |
| 185 | 2020-07-25 | 16047190 | 644517 | 9158743 | 6243930 | 255545 | 4867 | 219038 | 4.02 | 57.07 | 7.04 | 187 |
| 186 | 2020-07-26 | 16251796 | 648621 | 9293464 | 6309711 | 204606 | 4104 | 134721 | 3.99 | 57.18 | 6.98 | 187 |
| 187 | 2020-07-27 | 16480485 | 654036 | 9468087 | 6358362 | 228693 | 5415 | 174623 | 3.97 | 57.45 | 6.91 | 187 |
188 rows × 12 columns
world_data.columns
Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
'TotalTests', 'Tests/1M pop', 'WHO Region'],
dtype='object')
columns=['TotalCases','TotalDeaths','TotalRecovered','ActiveCases']
for i in columns:
fig=px.treemap(world_data[0:20],values=i,path=['Country/Region'],template="plotly_dark",title="<b>TreeMap representation of different Countries w.r.t. their {}</b>".format(i))
fig.show()
fig=px.line(day_wise,x="Date",y=["Confirmed","Deaths","Recovered","Active"],title="covid cases w.r.t. date",template="plotly_dark")
fig.show()
pop_test_ratio=world_data.iloc[0:10]['Population']/world_data.iloc[0:10]['TotalTests']
pop_test_ratio
0 5.245489 1 16.106896 2 62.365033 3 4.911040 4 18.852446 5 122.115932 6 13.241331 7 10.866949 8 28.269105 9 6.618696 dtype: float64
fig=px.bar(world_data.iloc[0:10],color='Country/Region',y=pop_test_ratio,x='Country/Region',template="plotly_dark",title="<b>Population to Tests done ratio's</b>")
fig.show()
fig=px.bar(world_data.iloc[0:10],x='Country/Region',y=['Serious,Critical','TotalDeaths','TotalRecovered','ActiveCases','TotalCases'],template="plotly")
fig.update_layout({'title':"Coronavirus cases w.r.t. time"})
fig.show()
world_data.head()
| Country/Region | Continent | Population | TotalCases | NewCases | TotalDeaths | NewDeaths | TotalRecovered | NewRecovered | ActiveCases | Serious,Critical | Tot Cases/1M pop | Deaths/1M pop | TotalTests | Tests/1M pop | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | USA | North America | 3.311981e+08 | 5032179 | NaN | 162804.0 | NaN | 2576668.0 | NaN | 2292707.0 | 18296.0 | 15194.0 | 492.0 | 63139605.0 | 190640.0 | Americas |
| 1 | Brazil | South America | 2.127107e+08 | 2917562 | NaN | 98644.0 | NaN | 2047660.0 | NaN | 771258.0 | 8318.0 | 13716.0 | 464.0 | 13206188.0 | 62085.0 | Americas |
| 2 | India | Asia | 1.381345e+09 | 2025409 | NaN | 41638.0 | NaN | 1377384.0 | NaN | 606387.0 | 8944.0 | 1466.0 | 30.0 | 22149351.0 | 16035.0 | South-EastAsia |
| 3 | Russia | Europe | 1.459409e+08 | 871894 | NaN | 14606.0 | NaN | 676357.0 | NaN | 180931.0 | 2300.0 | 5974.0 | 100.0 | 29716907.0 | 203623.0 | Europe |
| 4 | South Africa | Africa | 5.938157e+07 | 538184 | NaN | 9604.0 | NaN | 387316.0 | NaN | 141264.0 | 539.0 | 9063.0 | 162.0 | 3149807.0 | 53044.0 | Africa |
world_data['Country/Region'].nunique()
209
fig=px.bar(world_data.iloc[0:20],y='Country/Region',x='TotalCases',color='TotalCases',text="TotalCases")
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total confirmed cases</b>")
fig.show()
fig=px.bar(world_data.sort_values(by='TotalDeaths',ascending=False)[0:20],y='Country/Region',x='TotalDeaths',color='TotalDeaths',text="TotalDeaths")
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total deaths</b>")
fig.show()
fig=px.bar(world_data.sort_values(by='ActiveCases',ascending=False)[0:20], y='Country/Region',x='ActiveCases',color='ActiveCases',text='ActiveCases')
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total Active cases")
fig.show()
fig=px.bar(world_data.sort_values(by='TotalRecovered',ascending=False)[:20],y='Country/Region',x='TotalRecovered',color='TotalRecovered',text='TotalRecovered')
fig.update_layout(template="plotly_dark",title_text="<b>Top 20 countries of Total Recovered")
fig.show()
world_data.columns
Index(['Country/Region', 'Continent', 'Population', 'TotalCases', 'NewCases',
'TotalDeaths', 'NewDeaths', 'TotalRecovered', 'NewRecovered',
'ActiveCases', 'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop',
'TotalTests', 'Tests/1M pop', 'WHO Region'],
dtype='object')
world_data[0:15]['Country/Region'].values
array(['USA', 'Brazil', 'India', 'Russia', 'South Africa', 'Mexico',
'Peru', 'Chile', 'Colombia', 'Spain', 'Iran', 'UK', 'Saudi Arabia',
'Pakistan', 'Bangladesh'], dtype=object)
labels=world_data[0:15]['Country/Region'].values
cases=['TotalCases','TotalDeaths','TotalRecovered','ActiveCases']
for i in cases:
fig=px.pie(world_data[0:15],values=i,names=labels,template="plotly_dark",hole=0.3,title=" {} Recordeded w.r.t. to WHO Region of 15 worst effected countries ".format(i))
fig.show()
deaths_to_confirmed=((world_data['TotalDeaths']/world_data['TotalCases']))
fig = px.bar(world_data,x='Country/Region',y=deaths_to_confirmed)
fig.update_layout(title={'text':"Death to confirmed ratio of some worst effected countries",'xanchor':'left'},template="plotly_dark")
fig.show()
deaths_to_recovered=((world_data['TotalDeaths']/world_data['TotalRecovered']))
fig = px.bar(world_data,x='Country/Region',y=deaths_to_recovered)
fig.update_layout(title={'text':"Death to recovered ratio of some worst effected countries",'xanchor':'left'},template="plotly_dark")
fig.show()
tests_to_confirmed=((world_data['TotalTests']/world_data['TotalCases']))
fig = px.bar(world_data,x='Country/Region',y=tests_to_confirmed)
fig.update_layout(title={'text':"Tests to confirmed ratio of some worst effected countries",'xanchor':'left'},template="plotly_dark")
fig.show()
serious_to_death=((world_data['Serious,Critical']/world_data['TotalDeaths']))
fig = px.bar(world_data,x='Country/Region',y=serious_to_death)
fig.update_layout(title={'text':"serious to Death ratio of some worst effected countries",'xanchor':'left'},template="plotly_dark")
fig.show()
group_data.head()
| Date | Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Eastern Mediterranean |
| 1 | 2020-01-22 | Albania | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Europe |
| 2 | 2020-01-22 | Algeria | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Africa |
| 3 | 2020-01-22 | Andorra | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Europe |
| 4 | 2020-01-22 | Angola | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Africa |
from plotly.subplots import make_subplots ## for creating subplots in plotly
import plotly.graph_objects as go
def country_visualization(group_data,country):
data=group_data[group_data['Country/Region']==country]
df=data.loc[:,['Date','Confirmed','Deaths','Recovered','Active']]
fig = make_subplots(rows=1, cols=4,subplot_titles=("Confirmed", "Active", "Recovered",'Deaths'))
fig.add_trace(
go.Scatter(name="Confirmed",x=df['Date'],y=df['Confirmed']),
row=1, col=1
)
fig.add_trace(
go.Scatter(name="Active",x=df['Date'],y=df['Active']),
row=1, col=2
)
fig.add_trace(
go.Scatter(name="Recovered",x=df['Date'],y=df['Recovered']),
row=1, col=3
)
fig.add_trace(
go.Scatter(name="Deaths",x=df['Date'],y=df['Deaths']),
row=1, col=4
)
fig.update_layout(height=500, width=1000, title_text="Date Vs Recorded Cases of {}".format(country),template="plotly_dark")
fig.show()
country_visualization(group_data,'Brazil')
country_visualization(group_data,'US')
country_visualization(group_data, "India")